df_train = readRDS("train.rds")	

ids = read.csv("good_274_ids.csv")
names(ids)="id"

#> mean(df_train$loss)
#[1] 0.7995847
#> mean(df_train$loss[df_train$loss>0])
#[1] 8.620362

df_subset1 = merge(df_train, ids, by="id")

mean_impute = function(x)
{
	mu = mean(na.omit(x))
	x[is.na(x)] = mu
	x
}

df_subset1 = data.frame(lapply(df_subset1, mean_impute))
df_subset1 = df_subset1[,sapply(df_subset1, sd)>0]
nms = setdiff(names(df_subset1),
	c("f116","f117","f118","f126","f127","f128","f453","f454","f474","f96","f97","f98","f12"))
df_subset1 = df_subset1[,nms]

sort(sapply(df_subset1[,2:756],function(x) mean(df_subset1$loss[x >= quantile(x, .999)]>0)), decr=T)[2:50]

df_subset2 = df_subset1[df_subset1$f244>=50,]

sort(sapply(df_subset2[,2:780],function(x) mean(df_subset2$loss[x >= quantile(x, .6, na.rm=T)]>0)), decr=T)[2:15]

df_subset3 = subset(df_subset2, f778>49)

sort(sapply(df_subset1[,2:780],function(x) mean(df_subset1$loss[x <= quantile(x, .05, na.rm=T)]>0)), decr=T)[2:15]

test_ids = read.csv("test_ids.csv")
df_test = readRDS("test.rds")	
df_test_subset1 = merge(df_test, test_ids, by="id")


submission_ids = unique(na.omit(df_test_subset1$id[(df_test_subset1$f244>50) & (df_test_subset1$f778 > 49)]))

id_zero = setdiff(df_test$id, submission_ids)

submission = rbind(data.frame(id=id_zero, loss=0), data.frame(id=submission_ids, loss=3))
submission = submission[order(submission$id),]
write.csv(submission,"submission.csv",row.names=F)




